#!/usr/bin/env python
from __future__ import print_function
import argparse # 参数解析
# scikit-image是用于Python编程语言的开源图像处理库
# 它包括用于分割，几何变换，色彩空间操作，分析，过滤，形态学，特征检测等的算法
import skimage as skimage
from skimage import transform, color, exposure
from skimage.transform import rotate
from skimage.viewer import ImageViewer

import sys
sys.path.append("game/")
import wrapped_flappy_bird as game
import random
import numpy as np
from collections import deque

import json
# keras
from keras.initializers import normal, identity
from keras.models import model_from_json
from keras.models import Sequential # 顺序模型是多个网络层的线性堆叠
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD , Adam
import tensorflow as tf

GAME = 'bird' # the name of the game being played for log files
CONFIG = 'nothreshold'
ACTIONS = 2 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVATION = 300. # 3200 # timesteps to observe before training
EXPLORE = 3000000. # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.1 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 32 # size of minibatch
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4
# image pixals
img_rows , img_cols = 80, 80
#Convert image into Black and white
img_channels = 4 #We stack 4 frames

# build model by keras
def buildmodel():
    print("Now we build the model")
    model = Sequential() # 顺序模型是多个网络层的线性堆叠
    model.add(Convolution2D(32, 8, 8, subsample=(4, 4), border_mode='same',
                input_shape=(img_rows,img_cols,img_channels)))  #80*80*4
    model.add(Activation('relu'))
    model.add(Convolution2D(64, 4, 4, subsample=(2, 2), border_mode='same'))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, 3, 3, subsample=(1, 1), border_mode='same'))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(2))
    adam = Adam(lr=LEARNING_RATE)
    # 模型编译,在训练模型之前，您需要配置学习过程
    model.compile(loss='mse',optimizer=adam)
    print("We finish building the model")
    return model


def trainNetwork(model,args):
    # open up a game state to communicate with emulator
    game_state = game.GameState()
    # store the previous observations in replay memory
    D = deque()
    # get the first state by doing nothing and preprocess the image to 80x80x4
    do_nothing = np.zeros(ACTIONS)  # ACTIONS = 2 number of valid actions
    do_nothing[0] = 1
    x_t, r_0, terminal = game_state.frame_step(do_nothing)
    # 图像处理
    x_t = skimage.color.rgb2gray(x_t) # RGB图转换成灰度图
    x_t = skimage.transform.resize(x_t,(80,80))
    x_t = skimage.exposure.rescale_intensity(x_t,out_range=(0,255))
    x_t = x_t / 255.0
    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)
    #In Keras, need to reshape
    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])  # 1*80*80*4
    
    if args['mode'] == 'Run':
        OBSERVE = 999999999    #We keep observe, never train
        epsilon = FINAL_EPSILON
        print ("Now we load weight")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        print ("Weight load successfully")    
    else:                       #We go to training mode
        OBSERVE = OBSERVATION
        epsilon = INITIAL_EPSILON
    t = 0
    while (True):
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0
        a_t = np.zeros([ACTIONS])
        #choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0:
            if random.random() <= epsilon:
                print("Random Action")
                action_index = random.randrange(ACTIONS)
                a_t[action_index] = 1
            else:
                q = model.predict(s_t)  # input a stack of 4 images, get the prediction
                max_Q = np.argmax(q)
                action_index = max_Q
                a_t[max_Q] = 1
        # Reduced the epsilon gradually
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE
        # Run the selected action and observed next state and reward
        x_t1_colored, r_t, terminal = game_state.frame_step(a_t)
        x_t1 = skimage.color.rgb2gray(x_t1_colored)
        x_t1 = skimage.transform.resize(x_t1,(80,80))
        x_t1 = skimage.exposure.rescale_intensity(x_t1, out_range=(0, 255))
        x_t1 = x_t1 / 255.0
        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) # 1x80x80x1
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3)        # 1*80*80*4
        # store the transition in D
        D.append( (s_t, action_index, r_t, s_t1, terminal) )
        if len(D) > REPLAY_MEMORY:
            D.popleft()
        #only train if done observing
        if t > OBSERVE:
            #sample a minibatch to train on
            # len(minibatch) = 32 # 行
            # len(minibatch[0]) = 5 # 列
            minibatch = random.sample(D, BATCH)  # BATCH = 32  # list
            # Now we do the experience replay
            # 与 zip 相反，zip(*) 可理解为解压，返回二维矩阵式
            state_t, action_t, reward_t, state_t1, terminal = zip(*minibatch)
            # state_t is tuple # len(state_t) = 32 # 32*(80*80*4) # type(state_t)
            # np.concatenate 数组拼接
            state_t = np.concatenate(state_t)  # <class 'numpy.ndarray'>
            state_t1 = np.concatenate(state_t1)
            # targets and Q_sa are same from model.predict
            targets = model.predict(state_t)  # ndarray (32,2)
            Q_sa = model.predict(state_t1)   # (32,2)
            targets[range(BATCH), action_t] = reward_t + GAMMA*np.max(Q_sa, axis=1)*np.invert(terminal) # (32,2)
            # 运行一批样品的单次梯度更新
            # state_t:(32,80,80,4)  targets: (32,2)
            loss += model.train_on_batch(state_t, targets)

        s_t = s_t1
        t = t + 1
        # save progress every 10000 iterations
        if t % 1000 == 0:
            print("Now we save model")
            model.save_weights("model.h5", overwrite=True)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)
        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"
        print("TIMESTEP", t, "/ STATE", state, \
            "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, \
            "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)

    print("Episode finished!")


def playGame(args):
    model = buildmodel()
    trainNetwork(model,args)


def main():
    parser = argparse.ArgumentParser(description='Description of your program')
    parser.add_argument('-m','--mode', help='Train / Run', default='Train')
    args = vars(parser.parse_args())
    playGame(args)


if __name__ == "__main__":
    config = tf.ConfigProto()   # tf.ConfigProto()函数用在创建session的时候，用来对session进行参数配置
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    # Keras作为TensorFlow的简化接口
    from keras import backend as K
    K.set_session(sess)

    main()
